/*** 
This do-file processes data from the economic tracker and other public data
sources that are used in the table 'Mechanisms Underlying the Persistent 
Reduction in Low-Wage Employment: Hysteresis vs. Current Conditions'.
***/

*-------------------------------------------------------------------------------
* Set up
*-------------------------------------------------------------------------------

* Set $root 
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"

*-------------------------------------------------------------------------------
* Import Population by Race from NHGIS 
*-------------------------------------------------------------------------------

project, uses("${root}/data/dvc/NHGIS/nhgis0002_ds181_2010_county.csv")
import delimited "${root}/data/dvc/NHGIS/nhgis0002_ds181_2010_county.csv", clear

rename (lgh001 lghaaa001 lghaab001 lghaac001 lghaad001 lghaae001 lghacf001 lghaeq001 lghafo001 lghagn001 lghahj001 lghaig001 lghaih001 lghaks001) ///
       (county_pop white_alone white black_alone black indian_alone indian asian_alone asian pacific_alone pacific other_alone other hispanic)

* Calculate shares
foreach var in white_alone white black_alone black indian_alone indian asian_alone asian pacific_alone pacific other_alone other hispanic {
	gen share_`var' = `var' / county_pop
}

* Create countyfips
gen countyfips = substr(gisjoin, 2, strlen(gisjoin) - 2)
replace countyfips = substr(countyfips, 1, 2) + substr(countyfips, 4, strlen(countyfips))
destring countyfips, replace

* Keep relevant variables
keep countyfips share_*

* Save temporarily
tempfile race_shares
save `race_shares'

*-------------------------------------------------------------------------------
* Import share of foreign born from ACS (2019) 
*-------------------------------------------------------------------------------

project, uses("${root}/data/dvc/ACS 2014-2018 5-Year County/Share Foreign Born/ACSDP5Y2019.DP02_data_with_overlays_2022-04-09T183038.csv")
import delimited "${root}/data/dvc/ACS 2014-2018 5-Year County/Share Foreign Born/ACSDP5Y2019.DP02_data_with_overlays_2022-04-09T183038.csv", clear varnames(2)

keep geographicareaname id v371 
rename (geographicareaname v371) (county_name percent_foreign_born)

* Format percentage of foreign born var
replace percent_foreign_born = "" if percent_foreign_born == "null"
destring percent_foreign_born, replace

* Format countyfips var
gen countyfips = substr(id, strlen(id) - 4, strlen(id))
destring countyfips, replace
gduplicates drop countyfips, force

* Confirm structure
gisid countyfips

* Drop Puerto Rico 
drop if floor(countyfips / 1000) == 72

* Save temporarily
tempfile foreign
save `foreign'

*-------------------------------------------------------------------------------
* Import and clean age and gender stats ACS (2015-2019) 
*-------------------------------------------------------------------------------

project, uses("${root}/data/dvc/NHGIS/nhgis0003_ds244_20195_county.csv")
import delimited "${root}/data/dvc/NHGIS/nhgis0003_ds244_20195_county.csv", clear

rename (alt0e001 alt0e002 alt0e026) (total male_total female_total)

gen female_25_54_total = alt0e035 + alt0e036 + alt0e037 + alt0e038 + alt0e039 + alt0e040
gen male_25_54_total = alt0e011 + alt0e012 + alt0e013 + alt0e014 + alt0e015 + alt0e016
gen total_25_54 = female_25_54_total + male_25_54_total 

gen female_25_54_share = female_25_54_total / female_total
gen male_25_54_share = male_25_54_total / male_total
gen pop_25_54_share = total_25_54 / total

gen pop_25_54_female_share = female_25_54_total/total_25_54
gen female_share = female_total / total

keep gisjoin year stusab regiona divisiona state statea county countya cousuba *_share
rename (*_share) (*)

label variable pop_25_54 "Share of population between ages 25 and 54"
label variable female_25_54 "Share of female opulation between ages 25 and 54"
label variable male_25_54 "Share of male population between ages 25 and 54"
label variable female "Share of population that is female"
label variable pop_25_54_female "Share of population aged 25-54 that is female"

* Create countyfips
gen countyfips = substr(gisjoin, 2, strlen(gisjoin) - 2)
replace countyfips = substr(countyfips, 1, 2) + substr(countyfips, 4, strlen(countyfips))
destring countyfips, replace

tempfile age_gender
save `age_gender'

*-------------------------------------------------------------------------------
* Import basic covariates from ACS (2014-2018)
*-------------------------------------------------------------------------------

project, uses("${root}/data/derived/ACS 2014-2018 5-Year County/ACS 2014-2018 County.dta")
use "${root}/data/derived/ACS 2014-2018 5-Year County/ACS 2014-2018 County.dta", clear 
rename county_fips countyfips
rename (pop_2014_2018_est medhhinc_2014_2018_est med_2br_2014_2018_est gini_2014_2018_est poorshare_2014_2018_est) ///
		(pop_2018 medhhinc_2018 med_2br_2018 gini_2018 poorshare_2018) 
keep countyfips pop_2018 medhhinc_2018 med_2br_2018 gini_2018 poorshare_2018

* Merge foreign born tempfile
merge 1:1 countyfips using `foreign', assert(3) nogen
* Merge race shares tempfile
merge 1:1 countyfips using `race_shares', nogen 
* Merge age and gender
merge 1:1 countyfips using `age_gender', nogen

* Drop unnecessary variables
drop county_name id gisjoin year stusab regiona divisiona state statea county countya cousuba
* Save covariates
tempfile covars
save `covars'

*-------------------------------------------------------------------------------
* Prepare tracker data
*-------------------------------------------------------------------------------

* COVID variables
project, uses("${root}/data/web/data/COVID - County - Daily 2020.csv")
import delimited using "${root}/data/web/data/COVID - County - Daily 2020.csv", clear

tempfile covid_2020
save `covid_2020'

project, uses("${root}/data/web/data/COVID - County - Daily 2021.csv.gz")
gzimport delimited using "${root}/data/web/data/COVID - County - Daily 2021.csv.gz", clear

tempfile covid_2021
save `covid_2021'

clear
append using `covid_2020'
append using `covid_2021'

* Convert from 7-day moving sum to 7-day moving average
foreach var in new_case_count new_death_count {
	replace `var' = `var' / 7
}

gen date = mdy(month, day, year)

* Cumulative cases
gegen cumcases_jul2020 = mean(case_count) if month(date) == 7 & year(date) == 2020, by(countyfips)
gegen cumcases_dec2020 = mean(case_count) if month(date) == 12 & year(date) == 2020, by(countyfips)
gegen cumcases_jul2021 = mean(case_count) if month(date) == 7 & year(date) == 2021, by(countyfips)
gegen cumcases_dec2021 = mean(case_count) if month(date) == 12 & year(date) == 2021, by(countyfips)

* Cumulative deaths
gegen cumdeaths_jul2020 = mean(death_count) if month(date) == 7 & year(date) == 2020, by(countyfips)
gegen cumdeaths_dec2020 = mean(death_count) if month(date) == 12 & year(date) == 2020, by(countyfips)
gegen cumdeaths_jul2021 = mean(death_count) if month(date) == 7 & year(date) == 2021, by(countyfips)
gegen cumdeaths_dec2021 = mean(death_count) if month(date) == 12 & year(date) == 2021, by(countyfips)

* Current cases and deaths
gegen currcases_end2021 = mean(new_case_count) if month(date) >= 10 & month(date) <= 12 & year(date) == 2021, by(countyfips)
gegen currdeaths_end2021 = mean(new_death_count) if month(date) >= 10 & month(date) <= 12 & year(date) == 2021, by(countyfips)

* Deaths in Q2 of 2020
gegen deaths_q2_2020 = sum(new_death_count) if inrange(date, mdy(4, 1, 2020), mdy(6, 30, 2020)), by(countyfips)
gegen deaths_q4_2020 = sum(new_death_count) if inrange(date, mdy(10, 1, 2020), mdy(12, 31, 2020)), by(countyfips)

gcollapse (mean) cum* curr* deaths*, by(countyfips) 

tempfile covid
save `covid'

* UI weeks
project, uses("${root}/data/dvc/UI Policy/ui_benefit_weeks_by_state.xlsx")
import excel  "${root}/data/dvc/UI Policy/ui_benefit_weeks_by_state.xlsx", clear firstrow

rename (fips max_number_weeks_dec21 max_number_weeks_jan20) (statefips ui_max_weeks_dec21 ui_max_weeks_jan20)

keep statefips ui_max_weeks_dec21 ui_max_weeks_jan20

gisid statefips

tempfile ui_weeks
save `ui_weeks'

* Small business revenue - county level
project, uses("${root}/data/web/data/Womply - County - Weekly.csv")
import delimited "${root}/data/web/data/Womply - County - Weekly.csv", clear

gisid countyfips year month day_endofweek

gen date = mdy(month, day_endofweek, year)

local i 1
foreach month in jan feb mar apr may jun jul aug sep oct nov dec {
	gegen revenue_`month'2020 = mean(revenue_all) if month(date) == `i' & year(date) == 2020, by(countyfips)
	gegen revenue_`month'2021 = mean(revenue_all) if month(date) == `i' & year(date) == 2021, by(countyfips)
	
	local i = `i' + 1
}

gcollapse (mean) *2020 *2021, by(countyfips)

gisid countyfips

tempfile revenue
save `revenue'

* Employment
project, uses("${root}/data/web/data/Employment - County - Weekly.csv")
import delimited "${root}/data/web/data/Employment - County - Weekly.csv", clear

gen date = mdy(month, day_endofweek, year) 

foreach var in emp emp_incq2 emp_incq3 emp_incq4 emp_incq1 emp_incbelowmed emp_incabovemed {
	
	gegen `var'_jan2020 = mean(`var') if month(date) == 1 & year(date) == 2020, by(countyfips)
	gegen `var'_feb2020 = mean(`var') if month(date) == 2 & year(date) == 2020, by(countyfips)
	gegen `var'_mar2020 = mean(`var') if month(date) == 3 & year(date) == 2020, by(countyfips)
	gegen `var'_apr2020 = mean(`var') if month(date) == 4 & year(date) == 2020, by(countyfips)
	gegen `var'_may2020 = mean(`var') if month(date) == 5 & year(date) == 2020, by(countyfips)
	gegen `var'_jun2020 = mean(`var') if month(date) == 6 & year(date) == 2020, by(countyfips)
	gegen `var'_jul2020 = mean(`var') if month(date) == 7 & year(date) == 2020, by(countyfips)
	gegen `var'_aug2020 = mean(`var') if month(date) == 8 & year(date) == 2020, by(countyfips)
	gegen `var'_sep2020 = mean(`var') if month(date) == 9 & year(date) == 2020, by(countyfips)
	gegen `var'_oct2020 = mean(`var') if month(date) == 10 & year(date) == 2020, by(countyfips)
	gegen `var'_nov2020 = mean(`var') if month(date) == 11 & year(date) == 2020, by(countyfips)
	gegen `var'_dec2020 = mean(`var') if month(date) == 12 & year(date) == 2020, by(countyfips)
	
	gegen `var'_jan2021 = mean(`var') if month(date) == 1 & year(date) == 2021, by(countyfips)
	gegen `var'_feb2021 = mean(`var') if month(date) == 2 & year(date) == 2021, by(countyfips)
	gegen `var'_mar2021 = mean(`var') if month(date) == 3 & year(date) == 2021, by(countyfips)
	gegen `var'_apr2021 = mean(`var') if month(date) == 4 & year(date) == 2021, by(countyfips)
	gegen `var'_may2021 = mean(`var') if month(date) == 5 & year(date) == 2021, by(countyfips)
	gegen `var'_jun2021 = mean(`var') if month(date) == 6 & year(date) == 2021, by(countyfips)
	gegen `var'_jul2021 = mean(`var') if month(date) == 7 & year(date) == 2021, by(countyfips)
	gegen `var'_aug2021 = mean(`var') if month(date) == 8 & year(date) == 2021, by(countyfips)
	gegen `var'_sep2021 = mean(`var') if month(date) == 9 & year(date) == 2021, by(countyfips)
	gegen `var'_oct2021 = mean(`var') if month(date) == 10 & year(date) == 2021, by(countyfips)
	gegen `var'_nov2021 = mean(`var') if month(date) == 11 & year(date) == 2021, by(countyfips)
	gegen `var'_dec2021 = mean(`var') if month(date) == 12 & year(date) == 2021, by(countyfips)
}

gcollapse (mean) *2020 *2021, by(countyfips) 


* Merge Covid
merge 1:1 countyfips using  `covid', nogen

* Merge covariates
merge 1:1 countyfips using `covars', nogen 

* Merge revenue 
merge 1:1 countyfips using `revenue', nogen 

* State
preserve

project, uses("${root}/data/web/data/GeoIDs - County.csv")
import delimited "${root}/data/web/data/GeoIDs - County.csv", clear

tempfile state
save `state'
restore

merge 1:1 countyfips using `state', nogen 

* Merge ui weeks
merge m:1 statefips using `ui_weeks', nogen


order countyfips countyname cityid cityname cz czname statename statefips stateabbrev county_pop2019 ///
	emp_jul2020 emp_incq1_jul2020 emp_incq2_jul2020 emp_incq3_jul2020 emp_incq4_jul2020 ///
	emp_dec2020 emp_incq1_dec2020 emp_incq2_dec2020 emp_incq3_dec2020 emp_incq4_dec2020 ///
	emp_jul2021 emp_incq1_jul2021 emp_incq2_jul2021 emp_incq3_jul2021 emp_incq4_jul2021 ///
	emp_dec2021 emp_incq1_dec2021 emp_incq2_dec2021	emp_incq3_dec2021 emp_incq4_dec2021 ///
	cumcases_jul2020 cumcases_dec2020 cumcases_jul2021 cumcases_dec2021 cumdeaths_jul2020 ///
	cumdeaths_dec2020 cumdeaths_jul2021 cumdeaths_dec2021 deaths_q2_2020 deaths_q4_2020 ///
	ui_max_weeks_dec21 revenue_jul2020 revenue_dec2021 pop_2018 medhhinc_2018 med_2br_2018 ///
	gini_2018 poorshare_2018 percent_foreign_born share_white_alone ///
	share_white share_black_alone share_black share_indian_alone share_indian ///
	share_asian_alone share_asian share_pacific_alone share_pacific share_other_alone ///
	share_other share_hispanic female_25_54 male_25_54 pop_25_54 pop_25_54_female female
	
drop city*

* Save final dataset
save "${root}/results/Employment, COVID, Covars - County.dta" , replace
project, creates("${root}/results/Employment, COVID, Covars - County.dta")

*-------------------------------------------------------------------------------
* Stats on mean number of Covid cases in Oct-Dec 2021 per 100,000 population
*-------------------------------------------------------------------------------

* Collapse to national level 
drop if mi(countyfips)
gisid countyfips
gcollapse (rawsum) currcases_end2021 pop_2018

* Mean national Covid cases per 100,000 pop in Oct-Dec 2021
gen cases_per_100K_pop = currcases_end2021 / pop_2018 * 100000
sum cases_per_100K_pop
local cases_per_100K_pop: di %3.0f r(mean)

* Create folders for output numbers for paper 
cap mkdir "${root}/results"
cap mkdir "${root}/results/paper numbers"
cap mkdir "${root}/results/paper numbers/Covid"

* Output numbers
cap erase "${root}/results/paper numbers/Covid/Mean National Covid Cases Oct-Dec 2021 per 100K pop.yaml"

yamlout using "${root}/results/paper numbers/Covid/Mean National Covid Cases Oct-Dec 2021 per 100K pop.yaml",  ///
	key("cases_per_100K_pop") ///
	comment("Mean National Covid Cases Oct-Dec 2021 per 100K pop") ///
	value(`cases_per_100K_pop') fmt(%3.0f)
	
project, creates("${root}/results/paper numbers/Covid/Mean National Covid Cases Oct-Dec 2021 per 100K pop.yaml")
